# %% [markdown]
"""
Why not just use Medicare?
"""
# %%
samples = pd.read_sql(f"""
WITH
opps AS (
SELECT
distinct
apc,
cpt
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10 opps
JOIN tq_dev.internal_dev_csong_cld_v1_0.tmp_ref_provider_spine_2025_02 ps
ON CAST(opps.provider_id AS VARCHAR) = ps.provider_id
WHERE effective_date = (
SELECT max(effective_date)
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10
)
)
SELECT apc, count(distinct billing_code) as n
FROM tq_dev.internal_dev_csong_cld_v1_0.prod_combined_all a
JOIN opps
ON a.billing_code = opps.cpt
WHERE provider_type LIKE '%Hospital%'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1000
""", con=trino_conn)
# %%
samples.head(30)
# %%
apc = '5113'
# %%
df = pd.read_sql(f"""
WITH
opps AS (
SELECT
distinct
apc,
cpt
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10 opps
JOIN tq_dev.internal_dev_csong_cld_v1_0.tmp_ref_provider_spine_2025_02 ps
ON CAST(opps.provider_id AS VARCHAR) = ps.provider_id
WHERE effective_date = (
SELECT max(effective_date)
FROM hive.cld_utils.ref_cms_opps_pricing_2025_04_10
)
)
SELECT
provider_id,
billing_code,
apc,
medicare_rate,
canonical_rate
FROM tq_dev.internal_dev_csong_cld_v1_0.prod_combined_all
JOIN opps
ON billing_code = cpt
WHERE provider_type LIKE '%Hospital%'
AND canonical_rate IS NOT NULL
AND canonical_rate_score = 5
AND canonical_rate between lower_bound AND upper_bound
AND apc = '{apc}'
ORDER BY RANDOM()
LIMIT 20000
""", con=trino_conn)
df
# %%
# pick 30 random codes
codes = df['billing_code'].sample(40, random_state=1)
df_plot = df[df['billing_code'].isin(codes)].copy()
# melt to long form
df_long = df_plot.melt(
id_vars='billing_code',
value_vars=['canonical_rate', 'medicare_rate'],
var_name='rate_type',
value_name='rate'
)
order = (
df_plot
.groupby('billing_code')['canonical_rate']
.median()
.sort_values()
.index
)
sns.set_theme(rc={"figure.figsize": (14, 12)})
ax = sns.boxplot(
x='billing_code',
y='rate',
hue='rate_type',
data=df_long.sort_values('rate'),
palette=['skyblue','red'],
order=order,
showfliers=False
)
plt.title(f'Rates by Billing Code (APC {apc})')
plt.xlabel('Billing Code')
plt.ylabel('Rate')
plt.xticks(rotation=90)
# ax.set_ylim(0, df['canonical_rate'].quantile(0.95))
plt.legend(title='Rate Type', loc='upper right')
plt.tight_layout()
plt.show()